In [2]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [3]:
nlp(u'lion').vector


Out[3]:
array([ 1.8963e-01, -4.0309e-01,  3.5350e-01, -4.7907e-01, -4.3311e-01,
        2.3857e-01,  2.6962e-01,  6.4332e-02,  3.0767e-01,  1.3712e+00,
       -3.7582e-01, -2.2713e-01, -3.5657e-01, -2.5355e-01,  1.7543e-02,
        3.3962e-01,  7.4723e-02,  5.1226e-01, -3.9759e-01,  5.1333e-03,
       -3.0929e-01,  4.8911e-02, -1.8610e-01, -4.1702e-01, -8.1639e-01,
       -1.6908e-01, -2.6246e-01, -1.5983e-02,  1.2479e-01, -3.7276e-02,
       -5.7125e-01, -1.6296e-01,  1.2376e-01, -5.5464e-02,  1.3244e-01,
        2.7519e-02,  1.2592e-01, -3.2722e-01, -4.9165e-01, -3.5559e-01,
       -3.0630e-01,  6.1185e-02, -1.6932e-01, -6.2405e-02,  6.5763e-01,
       -2.7925e-01, -3.0450e-03, -2.2400e-02, -2.8015e-01, -2.1975e-01,
       -4.3188e-01,  3.9864e-02, -2.2102e-01, -4.2693e-02,  5.2748e-02,
        2.8726e-01,  1.2315e-01, -2.8662e-02,  7.8294e-02,  4.6754e-01,
       -2.4589e-01, -1.1064e-01,  7.2250e-02, -9.4980e-02, -2.7548e-01,
       -5.4097e-01,  1.2823e-01, -8.2408e-02,  3.1035e-01, -6.3394e-02,
       -7.3755e-01, -5.4992e-01,  9.9999e-02, -2.0758e-01, -3.9674e-02,
        2.0664e-01, -9.7557e-02, -3.7092e-01,  2.7901e-01, -6.2218e-01,
       -1.0280e-01,  2.3271e-01,  4.3838e-01,  3.2445e-02, -2.9866e-01,
       -7.3611e-02,  7.1594e-01,  1.4241e-01,  2.7770e-01, -3.9892e-01,
        3.6656e-02,  1.5759e-01,  8.2014e-02, -5.7343e-01,  3.5457e-01,
        2.2491e-01, -6.2699e-01, -8.8106e-02,  2.4361e-01,  3.8533e-01,
       -1.4083e-01,  1.7691e-01,  7.0897e-02,  1.7951e-01, -4.5907e-01,
       -8.2120e-01, -2.6631e-02,  6.2549e-02,  4.2415e-01, -8.9630e-02,
       -2.4654e-01,  1.4156e-01,  4.0187e-01, -4.1232e-01,  8.4516e-02,
       -1.0626e-01,  7.3145e-01,  1.9217e-01,  1.4240e-01,  2.8511e-01,
       -2.9454e-01, -2.1948e-01,  9.0460e-01, -1.9098e-01, -1.0340e+00,
       -1.5754e-01, -1.1964e-01,  4.9888e-01, -1.0624e+00, -3.2820e-01,
       -1.1232e-02, -7.9482e-01,  3.7275e-01, -6.8710e-03, -2.5772e-01,
       -4.7005e-01, -4.1387e-01, -6.4089e-02, -2.8033e-01, -4.0778e-02,
       -2.4866e+00,  6.2494e-03, -1.0210e-02,  1.2752e-01,  3.4965e-01,
       -1.2571e-01,  3.1570e-01,  4.1926e-01,  2.0056e-01, -5.5984e-01,
       -2.2801e-01,  1.2012e-01, -2.0518e-03, -8.9764e-02, -8.0373e-02,
        1.1969e-02, -2.6978e-01,  3.4829e-01,  7.3664e-03, -1.1137e-01,
        6.3410e-01,  3.8449e-01, -6.2248e-01,  4.1145e-02,  2.5922e-01,
        6.5811e-01, -4.9548e-01, -1.3030e-01, -3.8279e-01,  1.1156e-01,
       -4.3085e-01,  3.4473e-01,  2.7109e-02, -2.5108e-01, -2.8011e-01,
        2.1662e-01,  3.2660e-01,  5.5895e-02,  7.6077e-02, -5.2480e-02,
        4.5928e-02, -2.5266e-01,  5.2845e-01, -1.3145e-01, -1.2453e-01,
        4.0556e-01,  3.1877e-01,  2.4415e-02, -2.2620e-01, -6.1960e-01,
       -4.0886e-01, -3.5534e-02, -5.5123e-03,  2.3438e-01,  8.7854e-01,
       -2.5161e-01,  4.0600e-01, -4.4284e-01,  3.4934e-01, -5.6429e-01,
       -2.3676e-01,  6.2199e-01, -2.8175e-01,  4.2024e-01,  1.0043e-01,
       -1.4720e-01,  4.9593e-01, -3.5850e-01, -1.3998e-01, -2.7494e-01,
        2.3827e-01,  5.7268e-01,  7.9025e-02,  1.7872e-02, -2.1829e-01,
        5.5050e-02, -5.4200e-01,  1.6788e-01,  3.9065e-01,  3.0209e-01,
        2.3040e-01, -3.9351e-02, -2.1078e-01, -2.7224e-01,  1.6907e-01,
        5.4819e-01,  9.4888e-02,  7.9798e-01, -6.6158e-02,  1.9844e-01,
        2.0307e-01,  4.4808e-02, -1.0240e-01, -6.9909e-02, -3.6756e-02,
        9.5159e-02, -2.7830e-01, -1.0597e-01, -1.6276e-01, -1.8211e-01,
       -3.1897e-01, -2.1633e-01,  1.4994e-01, -7.2057e-02,  2.2264e-01,
       -4.5551e-01,  3.0341e-01,  1.8431e-01,  2.1681e-01, -3.1940e-01,
        2.6426e-01,  5.8106e-01,  5.4635e-02,  6.3238e-01,  4.3169e-01,
        9.0343e-02,  1.9494e-01,  3.5483e-01, -2.0706e-02, -7.3117e-01,
        1.2941e-01,  1.7418e-01, -1.5065e-01,  5.3355e-02,  4.4794e-02,
       -1.6600e-01,  2.2007e-01, -5.3970e-01, -2.4968e-01, -2.6464e-01,
       -5.5515e-01,  5.8242e-01,  2.2295e-01,  2.4433e-01,  4.5275e-01,
        3.4693e-01,  1.2255e-01, -3.9059e-02, -3.2749e-01, -2.7891e-01,
        1.3766e-01,  3.8392e-01,  1.0543e-03, -1.0242e-02,  4.9205e-01,
       -1.7922e-01,  4.1215e-02,  1.3547e-01, -2.0598e-01, -2.3194e-01,
       -7.7701e-01, -3.8237e-01, -7.6383e-01,  1.9418e-01, -1.5441e-01,
        8.9740e-01,  3.0626e-01,  4.0376e-01,  2.1738e-01, -3.8050e-01],
      dtype=float32)

In [4]:
len(nlp(u'lion').vector)


Out[4]:
300

In [6]:
### vector output a sentence

sents = nlp(u'The quick brown fox jumped over a lazy dog')

sents.vector


Out[6]:
array([-2.29145795e-01, -3.46055627e-03, -5.60528897e-02,  7.85977766e-02,
        9.12922155e-03,  1.10382795e-01, -1.26714230e-01, -7.90637732e-02,
        1.31968096e-01,  1.86862671e+00, -2.59036660e-01, -9.45902020e-02,
       -1.29910111e-01, -1.72616780e-01, -2.08401456e-01,  1.87006649e-02,
        8.01126659e-02,  9.91218865e-01,  2.73393337e-02, -2.94356763e-01,
       -1.45484447e-01, -9.41558629e-02, -5.07920086e-02, -1.69811353e-01,
        1.41727030e-01, -8.95571038e-02, -1.84179127e-01, -1.76457226e-01,
        1.65122882e-01, -2.20417902e-01, -1.91546515e-01,  2.51313895e-01,
        6.73556626e-02, -5.30913323e-02,  1.28895223e-01, -5.74297756e-02,
        7.14288801e-02, -1.10088736e-01, -8.49754438e-02, -1.26965329e-01,
        2.06004441e-01,  7.04980046e-02, -6.15093037e-02, -1.66130662e-01,
        1.48633450e-01,  4.71172146e-02, -1.81588233e-01,  5.00197560e-02,
        1.43082336e-01,  2.85028890e-02, -2.06958458e-01,  2.00484216e-01,
        5.18219313e-04,  2.13606693e-02, -8.42414424e-02,  6.82437792e-02,
        3.52840014e-02,  1.79329336e-01, -9.48552191e-02, -8.99764448e-02,
        6.93147480e-02,  1.17364325e-01,  2.81073321e-02,  9.52875614e-02,
        1.40404105e-01, -1.61580786e-01,  1.07955532e-02,  4.05195542e-02,
        5.06632291e-02,  6.59189969e-02,  1.82070043e-02,  8.42595622e-02,
        6.82728887e-02,  3.44687775e-02,  7.74068907e-02, -1.00541331e-01,
       -7.22397119e-02, -1.36631444e-01,  7.82399997e-03,  6.45366637e-03,
       -3.64300050e-02,  1.83236331e-01, -4.48207781e-02, -5.04070148e-02,
        1.28827030e-02, -2.76747774e-02,  4.84310776e-01,  2.93981403e-01,
        2.97116339e-01,  8.06249902e-02, -7.29233325e-02,  1.58352450e-01,
        3.50650884e-02, -1.50348455e-01, -5.20066619e-02,  5.10889962e-02,
       -1.08953886e-01, -3.26871127e-02,  1.82315543e-01, -1.76915884e-01,
        7.51234517e-02,  3.48288864e-02, -1.47218555e-01, -5.25834449e-02,
        8.49288851e-02, -7.30551064e-01,  2.56704897e-01, -7.00142235e-02,
        5.10252193e-02, -8.52812231e-02,  6.54618889e-02, -1.08713120e-01,
        1.79649666e-01, -1.94106445e-01,  3.34541090e-02,  1.53969109e-01,
       -1.62373334e-02,  2.23454423e-02,  1.26356453e-01,  8.19555596e-02,
        7.33288899e-02, -1.39666215e-01, -5.76875471e-02, -1.54199898e-01,
       -1.81703672e-01,  8.44638869e-02, -1.19984429e-02, -9.62789953e-02,
       -8.59419480e-02,  1.57964393e-03, -9.74553302e-02, -4.44773361e-02,
       -1.70016795e-01,  1.30081773e-01,  1.18517242e-01, -1.47740498e-01,
       -1.51207790e-01, -2.60798894e-02,  7.20583089e-03, -5.91398887e-02,
       -2.04430008e+00, -4.88525555e-02,  1.42633557e-01,  1.18846670e-01,
        1.86597764e-01, -1.98611796e-01,  3.69429886e-02, -2.92323399e-02,
        1.74508438e-01,  2.44701132e-02,  7.42663443e-02, -3.51392217e-02,
       -1.06225409e-04, -4.92834412e-02, -2.45728999e-01,  5.33285514e-02,
       -1.16909109e-01,  1.55293569e-01,  4.15556654e-02, -2.11775109e-01,
       -6.41415492e-02, -1.29796550e-01, -1.74706772e-01, -1.03955440e-01,
       -6.71750009e-02, -9.75381285e-02, -5.64488955e-02, -5.04329987e-02,
       -1.09255046e-01, -1.09185778e-01,  1.22292109e-01, -6.37411792e-03,
       -1.37798786e-01, -1.17282547e-01, -2.98780531e-01,  4.50878032e-03,
        1.51448250e-01,  4.59450036e-02, -3.90139967e-02,  1.08529776e-01,
       -3.31053324e-02, -2.43077219e-01, -2.40157679e-01, -2.18712226e-01,
       -9.24932957e-03,  1.09542698e-01, -1.14373304e-02,  5.49983308e-02,
        5.12482189e-02, -1.29175857e-01,  5.50087690e-02,  2.40794420e-02,
       -2.66746879e-02, -4.12909985e-02,  9.04988647e-02,  1.63671792e-01,
        1.63387001e-01,  3.43656763e-02,  4.32012156e-02, -7.35577792e-02,
       -1.93504453e-01,  5.87084442e-02, -2.94104666e-01,  2.24302337e-02,
        1.98929116e-01,  1.08914085e-01,  5.39495535e-02, -1.01477228e-01,
        6.67209327e-02, -1.59832574e-02, -4.15321141e-02,  1.00349993e-01,
       -8.82177874e-02,  1.12334676e-01,  9.95865688e-02,  1.47073328e-01,
       -2.19971225e-01,  1.68930113e-01, -9.82039869e-02,  1.46480769e-01,
        7.54283294e-02,  7.47743398e-02, -4.67192121e-02, -2.89261602e-02,
       -1.68541744e-01, -2.04066336e-01,  6.15419932e-02,  1.78478420e-01,
        6.53144531e-03,  2.50683784e-01, -1.80575773e-01,  7.89033324e-02,
        1.53888576e-02, -2.56483369e-02,  7.45135620e-02, -8.18474293e-02,
       -1.20121110e-02, -7.43027106e-02,  9.36226696e-02,  1.48760885e-01,
       -1.36678651e-01, -1.26202106e-01, -2.14737803e-02,  1.77118890e-02,
       -2.12611686e-02, -7.99791217e-02, -7.95553401e-02, -9.02217776e-02,
       -2.11785555e-01,  1.05856664e-01, -3.49383317e-02, -1.37948439e-01,
       -3.86459231e-02,  1.60381228e-01,  1.18056178e-01,  1.20334335e-01,
        1.32341668e-01,  1.83661059e-02, -4.36116830e-02, -1.62338875e-02,
        1.18117318e-01,  1.23674564e-01,  1.21527009e-01, -1.65413886e-01,
        2.28357241e-01, -1.64023101e-01,  1.39535554e-02, -1.18618160e-01,
       -8.61809924e-02,  4.60525565e-02,  6.05534203e-03,  1.59306556e-01,
       -4.25856374e-02, -1.23108894e-01, -3.38825025e-02, -3.24287675e-02,
        4.69944486e-03,  4.22910005e-02,  5.81285506e-02,  3.38969007e-02,
        1.62819222e-01,  9.96906757e-02, -8.49305466e-02,  1.44411981e-01,
        6.18494488e-02, -5.18973358e-02,  6.54534400e-02, -4.85357717e-02,
        5.73443016e-04,  1.92629937e-02, -1.51506230e-01,  5.55032529e-02,
       -2.19598915e-02, -1.37180611e-01, -3.97436693e-02,  1.23321891e-01,
        7.16196671e-02,  5.71780056e-02, -1.28691018e-01,  3.99866141e-03],
      dtype=float32)

Identifying similar vectors


In [7]:
token = nlp(u'lion cat pets')

for token1 in token:
    for token2 in token:
        print(token1, token2, token1.similarity(token2))


lion lion 1.0
lion cat 0.5265437
lion pets 0.2913302
cat lion 0.5265437
cat cat 1.0
cat pets 0.64571816
pets lion 0.2913302
pets cat 0.64571816
pets pets 1.0

In [10]:
### display as markdown 

a,b,c = token

from IPython.display import Markdown, display
display(Markdown(f'<table><tr><th></th><th>{a.text}</th><th>{b.text}</th><th>{c.text}</th></tr>\
<tr><td>**{a.text}**</td><td>{a.similarity(a):{.4}}</td><td>{b.similarity(a):{.4}}</td><td>{c.similarity(a):{.4}}</td></tr>\
<tr><td>**{b.text}**</td><td>{a.similarity(b):{.4}}</td><td>{b.similarity(b):{.4}}</td><td>{c.similarity(b):{.4}}</td></tr>\
<tr><td>**{c.text}**</td><td>{a.similarity(c):{.4}}</td><td>{b.similarity(c):{.4}}</td><td>{c.similarity(c):{.4}}</td></tr>'))


lioncatpets
**lion**1.00.52650.2913
**cat**0.52651.00.6457
**pets**0.29130.64571.0


In [19]:
nlp(u'lion').similarity(nlp(u'tiger'))


Out[19]:
0.7359829457249657

In [ ]: